/*****************************************************************************************
* MERGING INDIVIDUAL FILES FROM HARMONISED BHPS AND UKHLS IN LONG FORMAT                 *
* To match individual level files from the harmonised BHPS and Understanding Society     *
* in long format, you need to remove the wave prefixes in the two sets of files and      *
* generate a wave identifier that works across both sets of files. The pidp will         *
* work as the unique cross-wave identifier across both sets of files. This code only     *
* keeps individuals who took part in BHPS and drops those who joined as part of          *
* Understanding Society.                                                                 *
*****************************************************************************************/

// change current file location
cd "SET_YOUR_PATH_FOR_OUTPUT/BHPS/combined"

// assign global macro to refer to Understanding Society data
global ukhls "SET_YOUR_PATH_FOR_INPUT/BHPS/UKHLS"

// assign global macro to refer to BHPS data
global bhps "SET_YOUR_PATH_FOR_INPUT/BHPS/BHPS"

// assign global macros for the lists of waves
global BHPSwaves "a b c d e f g h i j k l m n o p q r"
global UKHLSwaves_bh "a b c d e f g h i j" // since BHPS respondents did not take 
									 // part in Wave 1, begin at Wave 2 (if you want to keep respondents from both surveys only)
									 // - update this to include 
									 // new waves as they are released
global UKHLSno 10	// number of waves of UKHLS data								 

// loop through the waves of bhps
foreach w of global BHPSwaves {

	// find the wave number
	local waveno=strpos("abcdefghijklmnopqrstuvwxyz","`w'")

	// open the individual file for that wave
	use "$bhps/b`w'_indresp_hh", clear
	
	// remove the wave prefix
	rename b`w'_* *
	
	// generate a variable which records the wave number
	gen wave=`waveno'
	
	// save the file for future use
	save tmp_b`w'_indresp_hh, replace
}

// loop through the relevant waves of Understanding Society
foreach w of global UKHLSwaves_bh {

	// find the wave number
	local waveno=strpos("abcdefghijklmnopqrstuvwxyz","`w'")
	
	// open the individual level file for that wave
	use "$ukhls/`w'_indresp_hh", clear
	
	// keep the individual if they have a pid - ie were part of BHPS
	// individuals have pid==-8 (inapplicable) if they were not part of BHPS
	*keep if pid>0
	
	// drop the pid variable
	*drop pid
	
	// remove the wave prefix
	rename `w'_* *

	// generate a variable which records the wave number + 17 
	// - treating wave 2 ukhls as wave 20 of bhps
	gen wave=`waveno'+18
	
	// save the file for future use
	save tmp_`w'_indresp_hh, replace
}

// loop through the waves of bhps
foreach w of global BHPSwaves {
	
	// first time through the loop
	if "`w'"=="a" {
	
		// reopen the first file created
		use tmp_ba_indresp_hh, clear
		
	// following times through the loop	
	} 
	else {	
		
		// append each file in turn
		append using tmp_b`w'_indresp_hh
	}
}

// loop through the waves of ukhls from Wave 2
foreach w of global UKHLSwaves_bh {
	
	// append each file in turn
	append using tmp_`w'_indresp_hh
}

// create labels for the wave variable
// loop through the waves of bhps
foreach n of numlist 1/18 {

	// add a label for each wave number in turn
	lab def wave `n' "BHPS Wave `n'", modify
}

// loop through the waves of ukhls 
// (using the global macro UKHLSno to define the last wave)
foreach n of numlist 1/$UKHLSno {
	
	// calculate which label value this label will apply to
	local waveref=`n'+18
	
	// add a label for each wave in turn
	lab def wave `waveref' "UKHLS Wave `n'", modify
}

// apply the label to the wave variable
lab val wave wave

// check how many observations are available from each wave
tab wave

// save the file containing all waves
save all_indresp_hh, replace

// erase each temporary file using loops
foreach w of global BHPSwaves {
	erase tmp_b`w'_indresp_hh.dta
}
foreach w of global UKHLSwaves_bh {
	erase tmp_`w'_indresp_hh.dta
}




**** Reshape to wide format for migration status

use all_indresp_hh, clear

recode plbornc (-8=0 "no migrant")(-2/97=1 "migrant")(else=.), gen(mig)
keep wave pidp mig

reshape wide mig, i(pidp) j(wave)

gen migrant = .
replace migrant = mig1
replace migrant = mig2 if migrant==.
replace migrant = mig3 if migrant==.
replace migrant = mig4 if migrant==.
replace migrant = mig5 if migrant==.
replace migrant = mig6 if migrant==.
replace migrant = mig7 if migrant==.
replace migrant = mig8 if migrant==.
replace migrant = mig9 if migrant==.
replace migrant = mig10 if migrant==.
replace migrant = mig11 if migrant==.
replace migrant = mig12 if migrant==.
replace migrant = mig13 if migrant==.
replace migrant = mig14 if migrant==.
replace migrant = mig15 if migrant==.
replace migrant = mig16 if migrant==.
replace migrant = mig17 if migrant==.
replace migrant = mig18 if migrant==.
replace migrant = mig19 if migrant==.
replace migrant = mig20 if migrant==.
replace migrant = mig21 if migrant==.
replace migrant = mig22 if migrant==.
replace migrant = mig23 if migrant==.
replace migrant = mig24 if migrant==.
replace migrant = mig25 if migrant==.
replace migrant = mig26 if migrant==.
replace migrant = mig27 if migrant==.
replace migrant = mig28 if migrant==.

reshape long mig, i(pidp) j(wave)

collapse (first) migrant , by(pidp)

merge 1:m pidp using "all_indresp_hh.dta"


save all_indresp_hh, replace
















